We’re gonna look at weather data.
library(tidyverse)
library(p8105.datasets)
library(plotly)
library(dplyr)
data("ny_noaa")
ny_noaa_fixed = ny_noaa %>%
separate(date, sep="-", into = c("year", "month", "day")) %>%
mutate(
year = as.integer(year),
month = as.integer(month),
day = as.integer(day),
tmax = as.numeric(tmax),
tmin = as.numeric(tmin),
tmax = tmax / 10,
tmin = tmin / 10
)
ny_noaa_fixed %>%
filter(year > 2009)
## # A tibble: 159,671 x 9
## id year month day prcp snow snwd tmax tmin
## <chr> <int> <int> <int> <int> <int> <int> <dbl> <dbl>
## 1 US1NYAB0001 2010 1 1 33 28 NA NA NA
## 2 US1NYAB0001 2010 1 2 28 84 NA NA NA
## 3 US1NYAB0001 2010 1 3 56 58 NA NA NA
## 4 US1NYAB0001 2010 1 4 38 38 NA NA NA
## 5 US1NYAB0001 2010 1 5 0 0 NA NA NA
## 6 US1NYAB0001 2010 1 6 0 0 NA NA NA
## 7 US1NYAB0001 2010 1 7 0 0 NA NA NA
## 8 US1NYAB0001 2010 1 8 0 0 NA NA NA
## 9 US1NYAB0001 2010 1 9 36 51 NA NA NA
## 10 US1NYAB0001 2010 1 10 0 0 NA NA NA
## # … with 159,661 more rows
159671/2
## [1] 79835.5
smaller_df = sample_n(ny_noaa_fixed, 500)
ny_noaa_fixed %>%
filter(year > 2009) %>%
mutate(text_label = str_c("Month: ", month, "\nYear: ", year)) %>%
plot_ly(
x = ~tmax, y = ~tmin, color = ~factor(month), text = ~text_label,
alpha = .5, type = "scatter", mode = "markers")
## Warning: Ignoring 108204 observations
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
ny_noaa_fixed %>%
filter(month == c(1,7)) %>%
mutate(year = factor(year), month = factor(month)) %>%
mutate(
month = ifelse( month == 1, "January", "July")
) %>%
plot_ly(
y = ~year, x = ~tmax, color = ~month,
type = "box", colors = "viridis") %>%
layout(
title = "NY Weather Over the Years in 2 Months ")
## Warning: Ignoring 95158 observations
ny_noaa_fixed %>%
filter(year > 2004) %>%
filter(month %in% c(12,1,2,3)) %>%
filter(snow >0) %>%
count(snow) %>%
mutate(snow = factor(snow) ) %>% #fct_reorder(factor(snow), n)) %>%
plot_ly(
x = ~snow, y = ~n, color = ~snow,
type = "bar", colors = "viridis") %>%
layout(
title = "Daily Snow fall (mm) in Winter Months of Years 2005-2010 ")
ggp_scatter =
nyc_airbnb %>%
ggplot(aes(x = lat, y = long, color = price)) +
geom_point()
ggplotly(ggp_scatter)